library(tidyverse) # for data cleaning and plotting
library(googlesheets4) # for reading googlesheet data
library(lubridate) # for date manipulation
library(openintro) # for the abbr2state() function
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
library(ggmap) # for mapping points on maps
library(gplots) # for col2hex() function
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
library(leaflet) # for highly customizable mapping
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(shiny) # for creating interactive apps
library(ggimage)
gs4_deauth() # To not have to authorize each time you knit.
theme_set(theme_minimal())
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv")
# Lisa's garden data
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>%
mutate(date = ymd(date))
# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>%
select(1:4, speed)
# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")
panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")
panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
Go here or to previous homework to remind yourself how to get set up.
Once your repository is created, you should always open your project rather than just opening an .Rmd file. You can do that by either clicking on the .Rproj file in your repository folder on your computer. Or, by going to the upper right hand corner in R Studio and clicking the arrow next to where it says Project: (None). You should see your project come up in that list if you’ve used it recently. You could also go to File –> Open Project and navigate to your .Rproj file.
Put your name at the top of the document.
For ALL graphs, you should include appropriate labels.
Feel free to change the default theme, which I currently have set to theme_minimal().
Use good coding practice. Read the short sections on good code with pipes and ggplot2. This is part of your grade!
NEW!! With animated graphs, add eval=FALSE to the code chunk that creates the animation and saves it using anim_save(). Add another code chunk to reread the gif back into the file. See the tutorial for help.
When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don’t do it before then, or else you might miss some important warnings and messages.
ggplotly() function.perfect_garden_graph_v3 <- garden_harvest %>%
#Filter dataset to only include tomatoes
filter(vegetable == "tomatoes") %>%
#Create a new variable that gives weight in pounds
mutate(weight_in_lbs = weight/453.592) %>%
#Calculate the total weight harvested for each date in pounds (in case a variety was harvested multiple times on the same day)
group_by(variety, date) %>%
summarize(total_days_weight_lbs = sum(weight_in_lbs)) %>%
#Pipe into ggplot and create a point graph. Facet by variety of tomato. Reorder the varieties so that varieties are graphed in order of mean(total_days_weight_lbs). Add color, labels, and a theme. This week, I changed the theme to "linedraw", which makes the data points easier to measure visually. Add in connecting lines to more clearly show the variations in weight over time.
ggplot(aes(x = date, y = total_days_weight_lbs)) +
geom_point(color = "red") +
geom_line() +
facet_wrap(~fct_reorder(variety, total_days_weight_lbs, mean)) +
labs(title = "The Weight of Tomato Varieties Harvested Over Time", x = "Date", y = "Weight (lbs)") +
theme_linedraw()
ggplotly(perfect_garden_graph_v3)
science <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/science.csv')
#Look at only the first listed occupation for each scientist in the dataset; create a new dataset called new_occupation
science %>%
separate(occupation_s, sep = ";", into = "first_occupation", extra = "drop") -> new_occupation
#Count the number of scientists in each occupation to help identify the top five most common occupations
new_occupation %>%
count(first_occupation)
#Filter out only the Top Five Most Common Occupations; create a new dataset called topfive_first_occupations
topfive_first_occupations <- filter(new_occupation, first_occupation %in% c("Inventor", "Chemist", "Mathematician", "Computer scientist", "Physicist"))
#Create a new variable from birth called birth_decade
floor_decade = function(value){ return(value - value %% 10) }
topfive_first_occupations$birth_decade <- floor_decade(topfive_first_occupations$birth)
#Plot birth decade vs. occupation in a bar graph; facet so rows show occupation
african_american_scientist_occupations <- topfive_first_occupations %>%
ggplot(aes(x = birth_decade)) +
geom_bar(fill = "blue") +
facet_grid(rows = vars(first_occupation), space = "free") +
labs(title = "Birth Decade vs. Occupation Frequency for African American Scientists", x = "Birth Decade", y = "Count") +
theme_dark()
ggplotly(african_american_scientist_occupations)
small_trains dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it here.small_trains %>%
filter(departure_station == "PARIS EST", arrival_station == "NANCY", year == 2018) %>%
#arrange(year, month)
ggplot(aes(x = month, y = total_num_trips)) +
geom_line(color = "darkred") +
labs(title = "Total Number of Trips per Month from PARIS EST to NANCY in 2018", x = "Month", y = "Trips", subtitle = "Month: {frame_along}") +
transition_reveal(month)
anim_save("Trains.gif")
knitr::include_graphics("Trains.gif")
geom_area() examples here). You will look at cumulative harvest of tomato varieties over time. You should do the following:garden_harvest data, filter the data to the tomatoes and find the daily harvest in pounds for each variety.fct_reorder()) from most to least harvested (most on the bottom).garden_harvest %>%
filter(vegetable == "tomatoes") %>%
complete(variety, date = seq.Date(min(date), max(date), by="day")) %>%
select(-c(vegetable, units)) %>%
mutate(weight = replace_na(weight, 0)) %>%
group_by(variety, date) %>%
summarize(daily_harvest_lb = sum(weight) * 0.00220462) %>%
mutate(cumsum_daily_harvest_lb = cumsum(daily_harvest_lb)) %>%
select(-daily_harvest_lb) %>%
ggplot(aes(x = date, y = cumsum_daily_harvest_lb, fill = variety)) +
geom_area(position = position_stack()) +
labs(title = "Cumulative Harvest of Tomato Varieties Over Time", x = "Date", y = "Pounds", fill = "Variety", subtitle = "Date: {frame_along}") +
transition_reveal(date)
anim_save("CumulativeHarvest.gif")
knitr::include_graphics("CumulativeHarvest.gif")
mallorca_bike_day7 bike ride using animation! Requirements:ggmap.ggimage package and geom_image to add a bike image instead of a red point. You can use this image. See here for an example.bike_image_link <- "https://raw.githubusercontent.com/llendway/animation_and_interactivity/master/bike.png"
mallorca_bike_day7 <- mallorca_bike_day7 %>%
mutate(image = bike_image_link)
mallorca_map <- get_stamenmap(
bbox = c(left = 2.28, bottom = 39.41, right = 3.03, top = 39.8),
maptype = "terrain",
zoom = 11)
ggmap(mallorca_map) +
geom_path(data = mallorca_bike_day7,
aes(x = lon, y = lat, color = ele),
size = .5) +
labs(title = "Mallorca Bike Ride",
subtitle = "Time: {frame_along}") +
geom_image(data = mallorca_bike_day7,
aes(x = lon, y = lat, image = bike_image_link),
size = 0.075) +
transition_reveal(time) +
scale_color_viridis_c(option = "magma") +
theme_map() +
theme(legend.background = element_blank())
anim_save("MallorcaBikeRide.gif")
knitr::include_graphics("MallorcaBikeRide.gif")
I prefer the animated map to the static map because it allows you to see the direction Lisa takes on her ride. The static map only shows the path, but not the direction it was traveled.
panama_swim, panama_bike, and panama_run. Create a similar map to the one you created with my cycling data. You will need to make some small changes: 1. combine the files (HINT: bind_rows(), 2. make the leading dot a different color depending on the event (for an extra challenge, make it a different image using `geom_image()!), 3. CHALLENGE (optional): color by speed, which you will need to compute on your own from the data. You can read Heather’s race report here. She is also in the Macalester Athletics Hall of Fame and still has records at the pool.total_trail <- panama_swim %>%
bind_rows(list(panama_run, panama_bike))
panama_map <- get_stamenmap(
bbox = c(left = -79.56, bottom = 8.88, right = -79.41, top = 9.001),
maptype = "terrain",
zoom = 13)
ggmap(panama_map) +
geom_point(data = total_trail,
aes(x = lon, y = lat, color = event, shape = event),
size = 2) +
geom_path(data = total_trail,
aes(x = lon, y = lat, color = event),
alpha = 0.8, size = 0.5) +
labs(title = "Ironman 70.3 Pan Am Championship",
subtitle = "Time: {frame_along}") +
scale_color_viridis_d(option = "magma") +
theme_map() +
theme(legend.background = element_blank()) +
transition_reveal(time)
anim_save("ironmanpanam.gif")
knitr::include_graphics("ironmanpanam.gif")